{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Figure 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt\n",
    "import geopandas as gpd\n",
    "import matplotlib.pyplot as plt\n",
    "from shapely.geometry import Point\n",
    "import geopandas as gpd\n",
    "from shapely.geometry import Point\n",
    "import matplotlib.patches as mpatches\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "pop_df=pd.read_csv(r'C:\\Users\\Yasaman\\Downloads\\World_bank_population.csv',skiprows=3)\n",
    "pop_df=pop_df[['Country Code','2019']].dropna()\n",
    "pop_df['2019']=pop_df['2019'].astype(int)\n",
    "possible_countries=pop_df.query(\" `2019` >=1000000\")['Country Code'].values\n",
    "possible_countries=[x.lower() for x in possible_countries]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "excluded_iso3_codes = [\n",
    "    \"IRL\",  # Ireland\n",
    "    \"SSD\",  # South Sudan\n",
    "    \"SDN\",  # Sudan\n",
    "    \"COG\",  # Republic of the Congo\n",
    "    \"COD\",  # Democratic Republic of the Congo\n",
    "    \"GIN\",  # Guinea\n",
    "    \"GNB\",  # Guinea-Bissau\n",
    "    \"GNQ\",  # Equatorial Guinea\n",
    "    \"PNG\",  # Papua New Guinea\n",
    "    \"XKX\",  # Kosovo (unofficial)\n",
    "    \"MNE\",  # Montenegro\n",
    "    \"SRB\",  # Serbia\n",
    "    \"TLS\"   # Timor-Leste\n",
    "]\n",
    "excluded_iso3_codes=[c.lower() for c in excluded_iso3_codes]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "possible_iso=list(set(possible_countries)-set(excluded_iso3_codes))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "df=pd.read_csv(r\"C:\\Users\\Yasaman\\Downloads\\Attention-fractional counting.csv\")\n",
    "df=df[df['country'].isin(possible_iso)]\n",
    "df=df.rename(columns={'year':'Year', 'aggregated_value':'count', 'country':'Mention_country', 'affiliation_country':'Aff_country'})\n",
    "Country_list={'Egypt':'EGY', 'Tunisia':'TUN','Libya':'LBY','Syria':'SYR','Yemen':'YEM','Bahrain':'BHR','Jordan':'JOR','Kuwait':'KWT','Morocco':'MAR','Oman':'OMN'}\n",
    "rev_Country_list={Country_list[key]: key for key in Country_list}\n",
    "abbr=[country.lower() for country in Country_list.values()]\n",
    "physical_sciences=[ 'MATH', 'ENGI', 'PHYS', 'COMP']\n",
    "df=df[~df['subjarea'].isin(physical_sciences)]\n",
    "df=df.groupby(['Year', 'Mention_country', 'Aff_country'])['count'].sum().reset_index()\n",
    "    \n",
    "country_codes=pd.read_csv(r\"C:\\Users\\Yasaman\\Downloads\\iso3.csv\")\n",
    "country_codes['iso3']=[c.lower() for c in country_codes['iso3']]\n",
    "Map={country_codes.iloc[c]['iso3']: country_codes.iloc[c]['name'] for c in range(len(country_codes))}\n",
    "Map['irn']='Iran'\n",
    "Map['usa']='USA'\n",
    "Map['gbr']='UK'\n",
    "Map['syr']='Syria'\n",
    "\n",
    "# Define year ranges for \"before\" and \"after\" data.\n",
    "years_before = np.arange(2002, 2011, 1)\n",
    "years_after = np.arange(2011, 2020, 1)\n",
    "\n",
    "\n",
    "\n",
    "# Function to calculate summary statistics for given years and data.\n",
    "def summarize_years(df, years):\n",
    "    # Filter the DataFrame for the given years range.\n",
    "    df_filtered = df[(df['Year'].isin(years))]\n",
    "    # Sum of counts by affiliation country.\n",
    "    summary_df = df_filtered.groupby(['Aff_country','Mention_country'])['count'].sum().reset_index()\n",
    "\n",
    "    return summary_df.fillna(0)  # Fill NaN with 0 if any.\n",
    "\n",
    "# Apply the function to both year ranges.\n",
    "before_df = summarize_years(df, years_before)\n",
    "before_df = before_df.add_suffix('_before')\n",
    "\n",
    "after_df = summarize_years(df, years_after)\n",
    "after_df = after_df.add_suffix('_after')\n",
    "\n",
    "\n",
    "full_df = before_df.merge(after_df, left_on=['Aff_country_before','Mention_country_before'], right_on=['Aff_country_after','Mention_country_after'], how='outer')\n",
    "full_df.fillna(0, inplace=True)\n",
    "full_df['Mention_country'] = np.where(full_df['Mention_country_before'] != 0, full_df['Mention_country_before'], full_df['Mention_country_after'])\n",
    "full_df['Aff_country'] = np.where(full_df['Aff_country_before'] != 0, full_df['Aff_country_before'], full_df['Aff_country_after'])\n",
    "full_df.drop(columns=['Aff_country_before','Mention_country_before','Mention_country_after','Aff_country_after'], inplace=True)\n",
    "\n",
    "result_df=full_df.groupby('Mention_country')[['count_after', 'count_before']].sum().reset_index()\n",
    "\n",
    "result_df['count_after']/=len(years_before)\n",
    "result_df['count_before']/=len(years_before)\n",
    "result_df['Absolute difference']=result_df['count_after']-result_df['count_before']\n",
    "\n",
    "col='Absolute difference'\n",
    "markers=['o', 'v', '^', 'x', 's', 'd', '>', '<', 'p', '+']\n",
    "copy_df = result_df[result_df['Mention_country'].isin(abbr)]\n",
    "copy_df=copy_df.sort_values(by=col, ascending=True).reset_index(drop=True)\n",
    "\n",
    "country_names=['egy','tun', 'lby', 'syr', 'yem', 'bhr', 'jor', 'kwt', 'mar', 'omn']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_df['percentile']=result_df['Absolute difference'].rank(pct=True)\n",
    "copy_df = result_df[result_df['Mention_country'].isin(abbr)]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Mention_country</th>\n",
       "      <th>count_after</th>\n",
       "      <th>count_before</th>\n",
       "      <th>Absolute difference</th>\n",
       "      <th>percentile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>bhr</td>\n",
       "      <td>90.518858</td>\n",
       "      <td>43.729693</td>\n",
       "      <td>46.789166</td>\n",
       "      <td>0.148649</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>egy</td>\n",
       "      <td>1685.642039</td>\n",
       "      <td>582.385429</td>\n",
       "      <td>1103.256611</td>\n",
       "      <td>0.810811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>jor</td>\n",
       "      <td>734.036070</td>\n",
       "      <td>349.237039</td>\n",
       "      <td>384.799031</td>\n",
       "      <td>0.581081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>kwt</td>\n",
       "      <td>231.880968</td>\n",
       "      <td>170.589003</td>\n",
       "      <td>61.291965</td>\n",
       "      <td>0.202703</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>lby</td>\n",
       "      <td>154.258554</td>\n",
       "      <td>54.661824</td>\n",
       "      <td>99.596730</td>\n",
       "      <td>0.283784</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>81</th>\n",
       "      <td>mar</td>\n",
       "      <td>727.885511</td>\n",
       "      <td>319.661711</td>\n",
       "      <td>408.223799</td>\n",
       "      <td>0.594595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>omn</td>\n",
       "      <td>341.294282</td>\n",
       "      <td>157.796353</td>\n",
       "      <td>183.497929</td>\n",
       "      <td>0.445946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>syr</td>\n",
       "      <td>264.513715</td>\n",
       "      <td>83.770240</td>\n",
       "      <td>180.743475</td>\n",
       "      <td>0.439189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>tun</td>\n",
       "      <td>745.209546</td>\n",
       "      <td>334.979979</td>\n",
       "      <td>410.229566</td>\n",
       "      <td>0.601351</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>yem</td>\n",
       "      <td>144.497129</td>\n",
       "      <td>64.257331</td>\n",
       "      <td>80.239798</td>\n",
       "      <td>0.236486</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Mention_country  count_after  count_before  Absolute difference  \\\n",
       "15              bhr    90.518858     43.729693            46.789166   \n",
       "39              egy  1685.642039    582.385429          1103.256611   \n",
       "65              jor   734.036070    349.237039           384.799031   \n",
       "72              kwt   231.880968    170.589003            61.291965   \n",
       "76              lby   154.258554     54.661824            99.596730   \n",
       "81              mar   727.885511    319.661711           408.223799   \n",
       "102             omn   341.294282    157.796353           183.497929   \n",
       "127             syr   264.513715     83.770240           180.743475   \n",
       "134             tun   745.209546    334.979979           410.229566   \n",
       "144             yem   144.497129     64.257331            80.239798   \n",
       "\n",
       "     percentile  \n",
       "15     0.148649  \n",
       "39     0.810811  \n",
       "65     0.581081  \n",
       "72     0.202703  \n",
       "76     0.283784  \n",
       "81     0.594595  \n",
       "102    0.445946  \n",
       "127    0.439189  \n",
       "134    0.601351  \n",
       "144    0.236486  "
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "copy_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Mention_country</th>\n",
       "      <th>count_after</th>\n",
       "      <th>count_before</th>\n",
       "      <th>Absolute difference</th>\n",
       "      <th>percentile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>uzb</td>\n",
       "      <td>98.079901</td>\n",
       "      <td>51.355659</td>\n",
       "      <td>46.724242</td>\n",
       "      <td>0.141892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>bhr</td>\n",
       "      <td>90.518858</td>\n",
       "      <td>43.729693</td>\n",
       "      <td>46.789166</td>\n",
       "      <td>0.148649</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>dom</td>\n",
       "      <td>107.162135</td>\n",
       "      <td>57.283776</td>\n",
       "      <td>49.878358</td>\n",
       "      <td>0.155405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>nic</td>\n",
       "      <td>133.520587</td>\n",
       "      <td>82.721112</td>\n",
       "      <td>50.799474</td>\n",
       "      <td>0.162162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>prk</td>\n",
       "      <td>79.358443</td>\n",
       "      <td>27.944148</td>\n",
       "      <td>51.414295</td>\n",
       "      <td>0.168919</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>ner</td>\n",
       "      <td>86.314246</td>\n",
       "      <td>33.591342</td>\n",
       "      <td>52.722904</td>\n",
       "      <td>0.175676</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>131</th>\n",
       "      <td>tjk</td>\n",
       "      <td>74.065878</td>\n",
       "      <td>21.092367</td>\n",
       "      <td>52.973510</td>\n",
       "      <td>0.182432</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>mda</td>\n",
       "      <td>77.528582</td>\n",
       "      <td>23.265745</td>\n",
       "      <td>54.262838</td>\n",
       "      <td>0.189189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>blr</td>\n",
       "      <td>112.952868</td>\n",
       "      <td>57.090000</td>\n",
       "      <td>55.862868</td>\n",
       "      <td>0.195946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>kwt</td>\n",
       "      <td>231.880968</td>\n",
       "      <td>170.589003</td>\n",
       "      <td>61.291965</td>\n",
       "      <td>0.202703</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Mention_country  count_after  count_before  Absolute difference  \\\n",
       "141             uzb    98.079901     51.355659            46.724242   \n",
       "15              bhr    90.518858     43.729693            46.789166   \n",
       "36              dom   107.162135     57.283776            49.878358   \n",
       "97              nic   133.520587     82.721112            50.799474   \n",
       "109             prk    79.358443     27.944148            51.414295   \n",
       "95              ner    86.314246     33.591342            52.722904   \n",
       "131             tjk    74.065878     21.092367            52.973510   \n",
       "82              mda    77.528582     23.265745            54.262838   \n",
       "17              blr   112.952868     57.090000            55.862868   \n",
       "72              kwt   231.880968    170.589003            61.291965   \n",
       "\n",
       "     percentile  \n",
       "141    0.141892  \n",
       "15     0.148649  \n",
       "36     0.155405  \n",
       "97     0.162162  \n",
       "109    0.168919  \n",
       "95     0.175676  \n",
       "131    0.182432  \n",
       "82     0.189189  \n",
       "17     0.195946  \n",
       "72     0.202703  "
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df.sort_values('percentile', ascending=True)[20:30]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
